{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "out = 'xlnet-base-bahasa-standard-cased'\n",
    "os.makedirs(out, exist_ok=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import XLNetTokenizer, XLNetModel, XLNetConfig, AutoTokenizer, AutoModelWithLMHead, pipeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('xlnet-base-bahasa-standard-cased/spiece.model',\n",
       " 'xlnet-base-bahasa-standard-cased/special_tokens_map.json',\n",
       " 'xlnet-base-bahasa-standard-cased/added_tokens.json')"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tokenizer = XLNetTokenizer('sp10m.cased.v9.model', do_lower_case = False)\n",
    "tokenizer.save_pretrained('xlnet-base-bahasa-standard-cased')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "tokenizer = XLNetTokenizer.from_pretrained('./xlnet-base-bahasa-standard-cased', do_lower_case = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:transformers.modeling_xlnet:Loading TF weight global_step with shape []\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/lm_loss/bias with shape [32000]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/lm_loss/bias/adam_m with shape [32000]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/lm_loss/bias/adam_v with shape [32000]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/LayerNorm/beta with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/LayerNorm/beta/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/LayerNorm/beta/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/LayerNorm/gamma with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/LayerNorm/gamma/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/LayerNorm/gamma/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/layer_1/bias with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/layer_1/bias/adam_m with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/layer_1/bias/adam_v with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/layer_1/kernel with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/layer_1/kernel/adam_m with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/layer_1/kernel/adam_v with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/layer_2/bias with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/layer_2/bias/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/layer_2/bias/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/layer_2/kernel with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/layer_2/kernel/adam_m with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/layer_2/kernel/adam_v with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/LayerNorm/beta with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/LayerNorm/beta/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/LayerNorm/beta/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/LayerNorm/gamma with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/LayerNorm/gamma/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/LayerNorm/gamma/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/k/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/k/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/k/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/o/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/o/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/o/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/q/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/q/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/q/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/r/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/r/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/r/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/v/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/v/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/v/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/LayerNorm/beta with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/LayerNorm/beta/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/LayerNorm/beta/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/LayerNorm/gamma with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/LayerNorm/gamma/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/LayerNorm/gamma/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/layer_1/bias with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/layer_1/bias/adam_m with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/layer_1/bias/adam_v with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/layer_1/kernel with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/layer_1/kernel/adam_m with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/layer_1/kernel/adam_v with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/layer_2/bias with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/layer_2/bias/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/layer_2/bias/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/layer_2/kernel with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/layer_2/kernel/adam_m with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/layer_2/kernel/adam_v with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/LayerNorm/beta with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/LayerNorm/beta/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/LayerNorm/beta/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/LayerNorm/gamma with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/LayerNorm/gamma/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/LayerNorm/gamma/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/k/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/k/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/k/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/o/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/o/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/o/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/q/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/q/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/q/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/r/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/r/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/r/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/v/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/v/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/v/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/LayerNorm/beta with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/LayerNorm/beta/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/LayerNorm/beta/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/LayerNorm/gamma with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/LayerNorm/gamma/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/LayerNorm/gamma/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/layer_1/bias with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/layer_1/bias/adam_m with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/layer_1/bias/adam_v with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/layer_1/kernel with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/layer_1/kernel/adam_m with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/layer_1/kernel/adam_v with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/layer_2/bias with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/layer_2/bias/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/layer_2/bias/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/layer_2/kernel with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/layer_2/kernel/adam_m with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/layer_2/kernel/adam_v with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/LayerNorm/beta with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/LayerNorm/beta/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/LayerNorm/beta/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/LayerNorm/gamma with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/LayerNorm/gamma/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/LayerNorm/gamma/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/k/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/k/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/k/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/o/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/o/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/o/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/q/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/q/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/q/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/r/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/r/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/r/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/v/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/v/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/v/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/LayerNorm/beta with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/LayerNorm/beta/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/LayerNorm/beta/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/LayerNorm/gamma with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/LayerNorm/gamma/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/LayerNorm/gamma/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/layer_1/bias with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/layer_1/bias/adam_m with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/layer_1/bias/adam_v with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/layer_1/kernel with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/layer_1/kernel/adam_m with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/layer_1/kernel/adam_v with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/layer_2/bias with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/layer_2/bias/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/layer_2/bias/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/layer_2/kernel with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/layer_2/kernel/adam_m with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/layer_2/kernel/adam_v with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/LayerNorm/beta with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/LayerNorm/beta/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/LayerNorm/beta/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/LayerNorm/gamma with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/LayerNorm/gamma/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/LayerNorm/gamma/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/k/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/k/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/k/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/o/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/o/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/o/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/q/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/q/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/q/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/r/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/r/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/r/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/v/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/v/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/v/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/LayerNorm/beta with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/LayerNorm/beta/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/LayerNorm/beta/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/LayerNorm/gamma with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/LayerNorm/gamma/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/LayerNorm/gamma/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/layer_1/bias with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/layer_1/bias/adam_m with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/layer_1/bias/adam_v with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/layer_1/kernel with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/layer_1/kernel/adam_m with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/layer_1/kernel/adam_v with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/layer_2/bias with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/layer_2/bias/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/layer_2/bias/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/layer_2/kernel with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/layer_2/kernel/adam_m with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/layer_2/kernel/adam_v with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/LayerNorm/beta with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/LayerNorm/beta/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/LayerNorm/beta/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/LayerNorm/gamma with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/LayerNorm/gamma/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/LayerNorm/gamma/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/k/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/k/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/k/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/o/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/o/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/o/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/q/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/q/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/q/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/r/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/r/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/r/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/v/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/v/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/v/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/LayerNorm/beta with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/LayerNorm/beta/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/LayerNorm/beta/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/LayerNorm/gamma with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/LayerNorm/gamma/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/LayerNorm/gamma/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/layer_1/bias with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/layer_1/bias/adam_m with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/layer_1/bias/adam_v with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/layer_1/kernel with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/layer_1/kernel/adam_m with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/layer_1/kernel/adam_v with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/layer_2/bias with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/layer_2/bias/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/layer_2/bias/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/layer_2/kernel with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/layer_2/kernel/adam_m with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/layer_2/kernel/adam_v with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/LayerNorm/beta with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/LayerNorm/beta/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/LayerNorm/beta/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/LayerNorm/gamma with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/LayerNorm/gamma/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/LayerNorm/gamma/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/k/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/k/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/k/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/o/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/o/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/o/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/q/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/q/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/q/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/r/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/r/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/r/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/v/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/v/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/v/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/LayerNorm/beta with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/LayerNorm/beta/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/LayerNorm/beta/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/LayerNorm/gamma with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/LayerNorm/gamma/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/LayerNorm/gamma/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/layer_1/bias with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/layer_1/bias/adam_m with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/layer_1/bias/adam_v with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/layer_1/kernel with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/layer_1/kernel/adam_m with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/layer_1/kernel/adam_v with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/layer_2/bias with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/layer_2/bias/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/layer_2/bias/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/layer_2/kernel with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/layer_2/kernel/adam_m with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/layer_2/kernel/adam_v with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/LayerNorm/beta with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/LayerNorm/beta/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/LayerNorm/beta/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/LayerNorm/gamma with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/LayerNorm/gamma/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/LayerNorm/gamma/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/k/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/k/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/k/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/o/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/o/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/o/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/q/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/q/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/q/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/r/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/r/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/r/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/v/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/v/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/v/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/LayerNorm/beta with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/LayerNorm/beta/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/LayerNorm/beta/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/LayerNorm/gamma with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/LayerNorm/gamma/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/LayerNorm/gamma/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/layer_1/bias with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/layer_1/bias/adam_m with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/layer_1/bias/adam_v with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/layer_1/kernel with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/layer_1/kernel/adam_m with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/layer_1/kernel/adam_v with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/layer_2/bias with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/layer_2/bias/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/layer_2/bias/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/layer_2/kernel with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/layer_2/kernel/adam_m with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/layer_2/kernel/adam_v with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/LayerNorm/beta with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/LayerNorm/beta/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/LayerNorm/beta/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/LayerNorm/gamma with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/LayerNorm/gamma/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/LayerNorm/gamma/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/k/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/k/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/k/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/o/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/o/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/o/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/q/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/q/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/q/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/r/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/r/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/r/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/v/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/v/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/v/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/LayerNorm/beta with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/LayerNorm/beta/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/LayerNorm/beta/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/LayerNorm/gamma with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/LayerNorm/gamma/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/LayerNorm/gamma/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/layer_1/bias with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/layer_1/bias/adam_m with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/layer_1/bias/adam_v with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/layer_1/kernel with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/layer_1/kernel/adam_m with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/layer_1/kernel/adam_v with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/layer_2/bias with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/layer_2/bias/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/layer_2/bias/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/layer_2/kernel with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/layer_2/kernel/adam_m with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/layer_2/kernel/adam_v with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/LayerNorm/beta with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/LayerNorm/beta/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/LayerNorm/beta/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/LayerNorm/gamma with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/LayerNorm/gamma/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/LayerNorm/gamma/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/k/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/k/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/k/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/o/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/o/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/o/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/q/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/q/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/q/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/r/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/r/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/r/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/v/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/v/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/v/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/LayerNorm/beta with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/LayerNorm/beta/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/LayerNorm/beta/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/LayerNorm/gamma with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/LayerNorm/gamma/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/LayerNorm/gamma/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/layer_1/bias with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/layer_1/bias/adam_m with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/layer_1/bias/adam_v with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/layer_1/kernel with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/layer_1/kernel/adam_m with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/layer_1/kernel/adam_v with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/layer_2/bias with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/layer_2/bias/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/layer_2/bias/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/layer_2/kernel with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/layer_2/kernel/adam_m with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/layer_2/kernel/adam_v with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/LayerNorm/beta with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/LayerNorm/beta/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/LayerNorm/beta/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/LayerNorm/gamma with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/LayerNorm/gamma/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/LayerNorm/gamma/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/k/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/k/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/k/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/o/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/o/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/o/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/q/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/q/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/q/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/r/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/r/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/r/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/v/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/v/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/v/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/LayerNorm/beta with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/LayerNorm/beta/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/LayerNorm/beta/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/LayerNorm/gamma with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/LayerNorm/gamma/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/LayerNorm/gamma/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/layer_1/bias with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/layer_1/bias/adam_m with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/layer_1/bias/adam_v with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/layer_1/kernel with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/layer_1/kernel/adam_m with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/layer_1/kernel/adam_v with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/layer_2/bias with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/layer_2/bias/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/layer_2/bias/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/layer_2/kernel with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/layer_2/kernel/adam_m with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/layer_2/kernel/adam_v with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/LayerNorm/beta with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/LayerNorm/beta/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/LayerNorm/beta/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/LayerNorm/gamma with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/LayerNorm/gamma/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/LayerNorm/gamma/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/k/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/k/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/k/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/o/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/o/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/o/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/q/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/q/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/q/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/r/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/r/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/r/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/v/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/v/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/v/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/LayerNorm/beta with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/LayerNorm/beta/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/LayerNorm/beta/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/LayerNorm/gamma with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/LayerNorm/gamma/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/LayerNorm/gamma/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/layer_1/bias with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/layer_1/bias/adam_m with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/layer_1/bias/adam_v with shape [3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/layer_1/kernel with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/layer_1/kernel/adam_m with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/layer_1/kernel/adam_v with shape [768, 3072]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/layer_2/bias with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/layer_2/bias/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/layer_2/bias/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/layer_2/kernel with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/layer_2/kernel/adam_m with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/layer_2/kernel/adam_v with shape [3072, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/LayerNorm/beta with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/LayerNorm/beta/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/LayerNorm/beta/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/LayerNorm/gamma with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/LayerNorm/gamma/adam_m with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/LayerNorm/gamma/adam_v with shape [768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/k/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/k/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/k/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/o/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/o/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/o/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/q/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/q/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/q/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/r/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/r/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/r/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/v/kernel with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/v/kernel/adam_m with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/v/kernel/adam_v with shape [768, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/mask_emb/mask_emb with shape [1, 1, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/mask_emb/mask_emb/adam_m with shape [1, 1, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/mask_emb/mask_emb/adam_v with shape [1, 1, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/r_r_bias with shape [12, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/r_r_bias/adam_m with shape [12, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/r_r_bias/adam_v with shape [12, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/r_s_bias with shape [12, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/r_s_bias/adam_m with shape [12, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/r_s_bias/adam_v with shape [12, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/r_w_bias with shape [12, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/r_w_bias/adam_m with shape [12, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/r_w_bias/adam_v with shape [12, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/seg_embed with shape [12, 2, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/seg_embed/adam_m with shape [12, 2, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/seg_embed/adam_v with shape [12, 2, 12, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/word_embedding/lookup_table with shape [32000, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/word_embedding/lookup_table/adam_m with shape [32000, 768]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/word_embedding/lookup_table/adam_v with shape [32000, 768]\n",
      "INFO:transformers.modeling_xlnet:Importing model/lm_loss/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/lm_loss/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/word_embedding/lookup_table\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/word_embedding/lookup_table\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/mask_emb/mask_emb\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/mask_emb/mask_emb\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_0/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_0/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_0/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_0/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_0/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_0/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_0/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_0/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_0/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_0/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_0/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_0/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_0/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_0/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_0/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_0/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_0/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_0/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_0/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_0/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_0/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_0/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_0/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_0/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_0/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_0/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_1/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_1/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_1/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_1/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_1/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_1/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_1/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_1/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_1/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_1/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_1/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_1/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_1/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_1/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_1/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_1/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_1/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_1/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_1/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_1/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_1/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_1/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_1/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_1/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_1/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_1/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_2/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_2/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_2/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_2/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_2/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_2/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_2/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_2/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_2/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_2/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_2/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_2/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_2/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_2/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_2/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_2/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_2/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_2/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_2/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_2/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_2/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_2/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_2/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_2/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_2/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_2/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_3/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_3/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_3/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_3/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_3/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_3/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_3/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_3/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_3/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_3/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_3/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_3/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_3/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_3/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_3/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_3/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_3/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_3/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_3/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_3/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_3/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_3/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_3/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_3/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_3/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_3/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_4/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_4/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_4/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_4/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_4/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_4/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_4/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_4/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_4/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_4/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_4/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_4/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_4/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_4/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_4/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_4/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_4/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_4/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_4/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_4/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_4/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_4/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_4/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_4/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_4/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_4/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_5/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_5/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_5/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_5/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_5/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_5/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_5/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_5/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_5/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_5/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_5/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_5/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_5/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_5/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_5/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_5/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_5/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_5/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_5/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_5/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_5/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_5/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_5/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_5/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_5/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_5/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_6/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_6/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_6/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_6/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_6/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_6/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_6/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_6/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_6/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_6/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_6/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_6/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_6/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_6/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_6/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_6/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_6/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_6/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_6/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_6/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_6/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_6/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_6/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_6/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_6/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_6/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_7/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_7/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_7/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_7/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_7/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_7/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_7/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_7/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_7/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_7/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_7/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_7/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_7/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_7/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_7/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_7/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_7/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_7/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_7/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_7/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_7/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_7/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_7/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_7/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_7/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_7/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_8/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_8/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_8/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_8/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_8/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_8/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_8/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_8/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_8/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_8/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_8/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_8/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_8/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_8/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_8/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_8/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_8/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_8/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_8/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_8/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_8/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_8/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_8/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_8/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_8/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_8/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_9/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_9/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_9/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_9/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_9/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_9/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_9/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_9/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_9/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_9/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_9/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_9/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_9/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_9/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_9/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_9/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_9/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_9/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_9/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_9/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_9/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_9/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_9/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_9/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_9/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_9/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_10/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_10/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_10/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_10/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_10/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_10/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_10/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_10/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_10/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_10/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_10/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_10/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_10/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_10/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_10/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_10/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_10/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_10/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_10/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_10/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_10/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_10/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_10/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_10/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_10/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_10/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_11/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_11/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_11/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_11/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_11/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_11/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_11/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_11/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_11/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_11/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_11/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_11/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_11/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_11/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_11/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_11/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_11/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_11/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_11/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_11/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_11/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_11/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_11/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_11/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_11/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_11/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/r_r_bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_r_bias for layer 0\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_r_bias for layer 1\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_r_bias for layer 2\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_r_bias for layer 3\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_r_bias for layer 4\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_r_bias for layer 5\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_r_bias for layer 6\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_r_bias for layer 7\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_r_bias for layer 8\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_r_bias for layer 9\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_r_bias for layer 10\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_r_bias for layer 11\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/r_w_bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_w_bias for layer 0\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_w_bias for layer 1\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_w_bias for layer 2\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_w_bias for layer 3\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_w_bias for layer 4\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_w_bias for layer 5\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_w_bias for layer 6\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_w_bias for layer 7\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_w_bias for layer 8\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_w_bias for layer 9\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_w_bias for layer 10\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_w_bias for layer 11\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/r_s_bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_s_bias for layer 0\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_s_bias for layer 1\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_s_bias for layer 2\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_s_bias for layer 3\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_s_bias for layer 4\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_s_bias for layer 5\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_s_bias for layer 6\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_s_bias for layer 7\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_s_bias for layer 8\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_s_bias for layer 9\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_s_bias for layer 10\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_s_bias for layer 11\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/seg_embed\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/seg_embed for layer 0\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/seg_embed for layer 1\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/seg_embed for layer 2\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/seg_embed for layer 3\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/seg_embed for layer 4\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/seg_embed for layer 5\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/seg_embed for layer 6\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/seg_embed for layer 7\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/seg_embed for layer 8\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/seg_embed for layer 9\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/seg_embed for layer 10\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/seg_embed for layer 11\n",
      "INFO:transformers.modeling_xlnet:Weights not copied to PyTorch model: global_step, model/lm_loss/bias/adam_m, model/lm_loss/bias/adam_v, model/transformer/layer_0/ff/LayerNorm/beta/adam_m, model/transformer/layer_0/ff/LayerNorm/beta/adam_v, model/transformer/layer_0/ff/LayerNorm/gamma/adam_m, model/transformer/layer_0/ff/LayerNorm/gamma/adam_v, model/transformer/layer_0/ff/layer_1/bias/adam_m, model/transformer/layer_0/ff/layer_1/bias/adam_v, model/transformer/layer_0/ff/layer_1/kernel/adam_m, model/transformer/layer_0/ff/layer_1/kernel/adam_v, model/transformer/layer_0/ff/layer_2/bias/adam_m, model/transformer/layer_0/ff/layer_2/bias/adam_v, model/transformer/layer_0/ff/layer_2/kernel/adam_m, model/transformer/layer_0/ff/layer_2/kernel/adam_v, model/transformer/layer_0/rel_attn/LayerNorm/beta/adam_m, model/transformer/layer_0/rel_attn/LayerNorm/beta/adam_v, model/transformer/layer_0/rel_attn/LayerNorm/gamma/adam_m, model/transformer/layer_0/rel_attn/LayerNorm/gamma/adam_v, model/transformer/layer_0/rel_attn/k/kernel/adam_m, model/transformer/layer_0/rel_attn/k/kernel/adam_v, model/transformer/layer_0/rel_attn/o/kernel/adam_m, model/transformer/layer_0/rel_attn/o/kernel/adam_v, model/transformer/layer_0/rel_attn/q/kernel/adam_m, model/transformer/layer_0/rel_attn/q/kernel/adam_v, model/transformer/layer_0/rel_attn/r/kernel/adam_m, model/transformer/layer_0/rel_attn/r/kernel/adam_v, model/transformer/layer_0/rel_attn/v/kernel/adam_m, model/transformer/layer_0/rel_attn/v/kernel/adam_v, model/transformer/layer_1/ff/LayerNorm/beta/adam_m, model/transformer/layer_1/ff/LayerNorm/beta/adam_v, model/transformer/layer_1/ff/LayerNorm/gamma/adam_m, model/transformer/layer_1/ff/LayerNorm/gamma/adam_v, model/transformer/layer_1/ff/layer_1/bias/adam_m, model/transformer/layer_1/ff/layer_1/bias/adam_v, model/transformer/layer_1/ff/layer_1/kernel/adam_m, model/transformer/layer_1/ff/layer_1/kernel/adam_v, model/transformer/layer_1/ff/layer_2/bias/adam_m, model/transformer/layer_1/ff/layer_2/bias/adam_v, model/transformer/layer_1/ff/layer_2/kernel/adam_m, model/transformer/layer_1/ff/layer_2/kernel/adam_v, model/transformer/layer_1/rel_attn/LayerNorm/beta/adam_m, model/transformer/layer_1/rel_attn/LayerNorm/beta/adam_v, model/transformer/layer_1/rel_attn/LayerNorm/gamma/adam_m, model/transformer/layer_1/rel_attn/LayerNorm/gamma/adam_v, model/transformer/layer_1/rel_attn/k/kernel/adam_m, model/transformer/layer_1/rel_attn/k/kernel/adam_v, model/transformer/layer_1/rel_attn/o/kernel/adam_m, model/transformer/layer_1/rel_attn/o/kernel/adam_v, model/transformer/layer_1/rel_attn/q/kernel/adam_m, model/transformer/layer_1/rel_attn/q/kernel/adam_v, model/transformer/layer_1/rel_attn/r/kernel/adam_m, model/transformer/layer_1/rel_attn/r/kernel/adam_v, model/transformer/layer_1/rel_attn/v/kernel/adam_m, model/transformer/layer_1/rel_attn/v/kernel/adam_v, model/transformer/layer_10/ff/LayerNorm/beta/adam_m, model/transformer/layer_10/ff/LayerNorm/beta/adam_v, model/transformer/layer_10/ff/LayerNorm/gamma/adam_m, model/transformer/layer_10/ff/LayerNorm/gamma/adam_v, model/transformer/layer_10/ff/layer_1/bias/adam_m, model/transformer/layer_10/ff/layer_1/bias/adam_v, model/transformer/layer_10/ff/layer_1/kernel/adam_m, model/transformer/layer_10/ff/layer_1/kernel/adam_v, model/transformer/layer_10/ff/layer_2/bias/adam_m, model/transformer/layer_10/ff/layer_2/bias/adam_v, model/transformer/layer_10/ff/layer_2/kernel/adam_m, model/transformer/layer_10/ff/layer_2/kernel/adam_v, model/transformer/layer_10/rel_attn/LayerNorm/beta/adam_m, model/transformer/layer_10/rel_attn/LayerNorm/beta/adam_v, model/transformer/layer_10/rel_attn/LayerNorm/gamma/adam_m, model/transformer/layer_10/rel_attn/LayerNorm/gamma/adam_v, model/transformer/layer_10/rel_attn/k/kernel/adam_m, model/transformer/layer_10/rel_attn/k/kernel/adam_v, model/transformer/layer_10/rel_attn/o/kernel/adam_m, model/transformer/layer_10/rel_attn/o/kernel/adam_v, model/transformer/layer_10/rel_attn/q/kernel/adam_m, model/transformer/layer_10/rel_attn/q/kernel/adam_v, model/transformer/layer_10/rel_attn/r/kernel/adam_m, model/transformer/layer_10/rel_attn/r/kernel/adam_v, model/transformer/layer_10/rel_attn/v/kernel/adam_m, model/transformer/layer_10/rel_attn/v/kernel/adam_v, model/transformer/layer_11/ff/LayerNorm/beta/adam_m, model/transformer/layer_11/ff/LayerNorm/beta/adam_v, model/transformer/layer_11/ff/LayerNorm/gamma/adam_m, model/transformer/layer_11/ff/LayerNorm/gamma/adam_v, model/transformer/layer_11/ff/layer_1/bias/adam_m, model/transformer/layer_11/ff/layer_1/bias/adam_v, model/transformer/layer_11/ff/layer_1/kernel/adam_m, model/transformer/layer_11/ff/layer_1/kernel/adam_v, model/transformer/layer_11/ff/layer_2/bias/adam_m, model/transformer/layer_11/ff/layer_2/bias/adam_v, model/transformer/layer_11/ff/layer_2/kernel/adam_m, model/transformer/layer_11/ff/layer_2/kernel/adam_v, model/transformer/layer_11/rel_attn/LayerNorm/beta/adam_m, model/transformer/layer_11/rel_attn/LayerNorm/beta/adam_v, model/transformer/layer_11/rel_attn/LayerNorm/gamma/adam_m, model/transformer/layer_11/rel_attn/LayerNorm/gamma/adam_v, model/transformer/layer_11/rel_attn/k/kernel/adam_m, model/transformer/layer_11/rel_attn/k/kernel/adam_v, model/transformer/layer_11/rel_attn/o/kernel/adam_m, model/transformer/layer_11/rel_attn/o/kernel/adam_v, model/transformer/layer_11/rel_attn/q/kernel/adam_m, model/transformer/layer_11/rel_attn/q/kernel/adam_v, model/transformer/layer_11/rel_attn/r/kernel/adam_m, model/transformer/layer_11/rel_attn/r/kernel/adam_v, model/transformer/layer_11/rel_attn/v/kernel/adam_m, model/transformer/layer_11/rel_attn/v/kernel/adam_v, model/transformer/layer_2/ff/LayerNorm/beta/adam_m, model/transformer/layer_2/ff/LayerNorm/beta/adam_v, model/transformer/layer_2/ff/LayerNorm/gamma/adam_m, model/transformer/layer_2/ff/LayerNorm/gamma/adam_v, model/transformer/layer_2/ff/layer_1/bias/adam_m, model/transformer/layer_2/ff/layer_1/bias/adam_v, model/transformer/layer_2/ff/layer_1/kernel/adam_m, model/transformer/layer_2/ff/layer_1/kernel/adam_v, model/transformer/layer_2/ff/layer_2/bias/adam_m, model/transformer/layer_2/ff/layer_2/bias/adam_v, model/transformer/layer_2/ff/layer_2/kernel/adam_m, model/transformer/layer_2/ff/layer_2/kernel/adam_v, model/transformer/layer_2/rel_attn/LayerNorm/beta/adam_m, model/transformer/layer_2/rel_attn/LayerNorm/beta/adam_v, model/transformer/layer_2/rel_attn/LayerNorm/gamma/adam_m, model/transformer/layer_2/rel_attn/LayerNorm/gamma/adam_v, model/transformer/layer_2/rel_attn/k/kernel/adam_m, model/transformer/layer_2/rel_attn/k/kernel/adam_v, model/transformer/layer_2/rel_attn/o/kernel/adam_m, model/transformer/layer_2/rel_attn/o/kernel/adam_v, model/transformer/layer_2/rel_attn/q/kernel/adam_m, model/transformer/layer_2/rel_attn/q/kernel/adam_v, model/transformer/layer_2/rel_attn/r/kernel/adam_m, model/transformer/layer_2/rel_attn/r/kernel/adam_v, model/transformer/layer_2/rel_attn/v/kernel/adam_m, model/transformer/layer_2/rel_attn/v/kernel/adam_v, model/transformer/layer_3/ff/LayerNorm/beta/adam_m, model/transformer/layer_3/ff/LayerNorm/beta/adam_v, model/transformer/layer_3/ff/LayerNorm/gamma/adam_m, model/transformer/layer_3/ff/LayerNorm/gamma/adam_v, model/transformer/layer_3/ff/layer_1/bias/adam_m, model/transformer/layer_3/ff/layer_1/bias/adam_v, model/transformer/layer_3/ff/layer_1/kernel/adam_m, model/transformer/layer_3/ff/layer_1/kernel/adam_v, model/transformer/layer_3/ff/layer_2/bias/adam_m, model/transformer/layer_3/ff/layer_2/bias/adam_v, model/transformer/layer_3/ff/layer_2/kernel/adam_m, model/transformer/layer_3/ff/layer_2/kernel/adam_v, model/transformer/layer_3/rel_attn/LayerNorm/beta/adam_m, model/transformer/layer_3/rel_attn/LayerNorm/beta/adam_v, model/transformer/layer_3/rel_attn/LayerNorm/gamma/adam_m, model/transformer/layer_3/rel_attn/LayerNorm/gamma/adam_v, model/transformer/layer_3/rel_attn/k/kernel/adam_m, model/transformer/layer_3/rel_attn/k/kernel/adam_v, model/transformer/layer_3/rel_attn/o/kernel/adam_m, model/transformer/layer_3/rel_attn/o/kernel/adam_v, model/transformer/layer_3/rel_attn/q/kernel/adam_m, model/transformer/layer_3/rel_attn/q/kernel/adam_v, model/transformer/layer_3/rel_attn/r/kernel/adam_m, model/transformer/layer_3/rel_attn/r/kernel/adam_v, model/transformer/layer_3/rel_attn/v/kernel/adam_m, model/transformer/layer_3/rel_attn/v/kernel/adam_v, model/transformer/layer_4/ff/LayerNorm/beta/adam_m, model/transformer/layer_4/ff/LayerNorm/beta/adam_v, model/transformer/layer_4/ff/LayerNorm/gamma/adam_m, model/transformer/layer_4/ff/LayerNorm/gamma/adam_v, model/transformer/layer_4/ff/layer_1/bias/adam_m, model/transformer/layer_4/ff/layer_1/bias/adam_v, model/transformer/layer_4/ff/layer_1/kernel/adam_m, model/transformer/layer_4/ff/layer_1/kernel/adam_v, model/transformer/layer_4/ff/layer_2/bias/adam_m, model/transformer/layer_4/ff/layer_2/bias/adam_v, model/transformer/layer_4/ff/layer_2/kernel/adam_m, model/transformer/layer_4/ff/layer_2/kernel/adam_v, model/transformer/layer_4/rel_attn/LayerNorm/beta/adam_m, model/transformer/layer_4/rel_attn/LayerNorm/beta/adam_v, model/transformer/layer_4/rel_attn/LayerNorm/gamma/adam_m, model/transformer/layer_4/rel_attn/LayerNorm/gamma/adam_v, model/transformer/layer_4/rel_attn/k/kernel/adam_m, model/transformer/layer_4/rel_attn/k/kernel/adam_v, model/transformer/layer_4/rel_attn/o/kernel/adam_m, model/transformer/layer_4/rel_attn/o/kernel/adam_v, model/transformer/layer_4/rel_attn/q/kernel/adam_m, model/transformer/layer_4/rel_attn/q/kernel/adam_v, model/transformer/layer_4/rel_attn/r/kernel/adam_m, model/transformer/layer_4/rel_attn/r/kernel/adam_v, model/transformer/layer_4/rel_attn/v/kernel/adam_m, model/transformer/layer_4/rel_attn/v/kernel/adam_v, model/transformer/layer_5/ff/LayerNorm/beta/adam_m, model/transformer/layer_5/ff/LayerNorm/beta/adam_v, model/transformer/layer_5/ff/LayerNorm/gamma/adam_m, model/transformer/layer_5/ff/LayerNorm/gamma/adam_v, model/transformer/layer_5/ff/layer_1/bias/adam_m, model/transformer/layer_5/ff/layer_1/bias/adam_v, model/transformer/layer_5/ff/layer_1/kernel/adam_m, model/transformer/layer_5/ff/layer_1/kernel/adam_v, model/transformer/layer_5/ff/layer_2/bias/adam_m, model/transformer/layer_5/ff/layer_2/bias/adam_v, model/transformer/layer_5/ff/layer_2/kernel/adam_m, model/transformer/layer_5/ff/layer_2/kernel/adam_v, model/transformer/layer_5/rel_attn/LayerNorm/beta/adam_m, model/transformer/layer_5/rel_attn/LayerNorm/beta/adam_v, model/transformer/layer_5/rel_attn/LayerNorm/gamma/adam_m, model/transformer/layer_5/rel_attn/LayerNorm/gamma/adam_v, model/transformer/layer_5/rel_attn/k/kernel/adam_m, model/transformer/layer_5/rel_attn/k/kernel/adam_v, model/transformer/layer_5/rel_attn/o/kernel/adam_m, model/transformer/layer_5/rel_attn/o/kernel/adam_v, model/transformer/layer_5/rel_attn/q/kernel/adam_m, model/transformer/layer_5/rel_attn/q/kernel/adam_v, model/transformer/layer_5/rel_attn/r/kernel/adam_m, model/transformer/layer_5/rel_attn/r/kernel/adam_v, model/transformer/layer_5/rel_attn/v/kernel/adam_m, model/transformer/layer_5/rel_attn/v/kernel/adam_v, model/transformer/layer_6/ff/LayerNorm/beta/adam_m, model/transformer/layer_6/ff/LayerNorm/beta/adam_v, model/transformer/layer_6/ff/LayerNorm/gamma/adam_m, model/transformer/layer_6/ff/LayerNorm/gamma/adam_v, model/transformer/layer_6/ff/layer_1/bias/adam_m, model/transformer/layer_6/ff/layer_1/bias/adam_v, model/transformer/layer_6/ff/layer_1/kernel/adam_m, model/transformer/layer_6/ff/layer_1/kernel/adam_v, model/transformer/layer_6/ff/layer_2/bias/adam_m, model/transformer/layer_6/ff/layer_2/bias/adam_v, model/transformer/layer_6/ff/layer_2/kernel/adam_m, model/transformer/layer_6/ff/layer_2/kernel/adam_v, model/transformer/layer_6/rel_attn/LayerNorm/beta/adam_m, model/transformer/layer_6/rel_attn/LayerNorm/beta/adam_v, model/transformer/layer_6/rel_attn/LayerNorm/gamma/adam_m, model/transformer/layer_6/rel_attn/LayerNorm/gamma/adam_v, model/transformer/layer_6/rel_attn/k/kernel/adam_m, model/transformer/layer_6/rel_attn/k/kernel/adam_v, model/transformer/layer_6/rel_attn/o/kernel/adam_m, model/transformer/layer_6/rel_attn/o/kernel/adam_v, model/transformer/layer_6/rel_attn/q/kernel/adam_m, model/transformer/layer_6/rel_attn/q/kernel/adam_v, model/transformer/layer_6/rel_attn/r/kernel/adam_m, model/transformer/layer_6/rel_attn/r/kernel/adam_v, model/transformer/layer_6/rel_attn/v/kernel/adam_m, model/transformer/layer_6/rel_attn/v/kernel/adam_v, model/transformer/layer_7/ff/LayerNorm/beta/adam_m, model/transformer/layer_7/ff/LayerNorm/beta/adam_v, model/transformer/layer_7/ff/LayerNorm/gamma/adam_m, model/transformer/layer_7/ff/LayerNorm/gamma/adam_v, model/transformer/layer_7/ff/layer_1/bias/adam_m, model/transformer/layer_7/ff/layer_1/bias/adam_v, model/transformer/layer_7/ff/layer_1/kernel/adam_m, model/transformer/layer_7/ff/layer_1/kernel/adam_v, model/transformer/layer_7/ff/layer_2/bias/adam_m, model/transformer/layer_7/ff/layer_2/bias/adam_v, model/transformer/layer_7/ff/layer_2/kernel/adam_m, model/transformer/layer_7/ff/layer_2/kernel/adam_v, model/transformer/layer_7/rel_attn/LayerNorm/beta/adam_m, model/transformer/layer_7/rel_attn/LayerNorm/beta/adam_v, model/transformer/layer_7/rel_attn/LayerNorm/gamma/adam_m, model/transformer/layer_7/rel_attn/LayerNorm/gamma/adam_v, model/transformer/layer_7/rel_attn/k/kernel/adam_m, model/transformer/layer_7/rel_attn/k/kernel/adam_v, model/transformer/layer_7/rel_attn/o/kernel/adam_m, model/transformer/layer_7/rel_attn/o/kernel/adam_v, model/transformer/layer_7/rel_attn/q/kernel/adam_m, model/transformer/layer_7/rel_attn/q/kernel/adam_v, model/transformer/layer_7/rel_attn/r/kernel/adam_m, model/transformer/layer_7/rel_attn/r/kernel/adam_v, model/transformer/layer_7/rel_attn/v/kernel/adam_m, model/transformer/layer_7/rel_attn/v/kernel/adam_v, model/transformer/layer_8/ff/LayerNorm/beta/adam_m, model/transformer/layer_8/ff/LayerNorm/beta/adam_v, model/transformer/layer_8/ff/LayerNorm/gamma/adam_m, model/transformer/layer_8/ff/LayerNorm/gamma/adam_v, model/transformer/layer_8/ff/layer_1/bias/adam_m, model/transformer/layer_8/ff/layer_1/bias/adam_v, model/transformer/layer_8/ff/layer_1/kernel/adam_m, model/transformer/layer_8/ff/layer_1/kernel/adam_v, model/transformer/layer_8/ff/layer_2/bias/adam_m, model/transformer/layer_8/ff/layer_2/bias/adam_v, model/transformer/layer_8/ff/layer_2/kernel/adam_m, model/transformer/layer_8/ff/layer_2/kernel/adam_v, model/transformer/layer_8/rel_attn/LayerNorm/beta/adam_m, model/transformer/layer_8/rel_attn/LayerNorm/beta/adam_v, model/transformer/layer_8/rel_attn/LayerNorm/gamma/adam_m, model/transformer/layer_8/rel_attn/LayerNorm/gamma/adam_v, model/transformer/layer_8/rel_attn/k/kernel/adam_m, model/transformer/layer_8/rel_attn/k/kernel/adam_v, model/transformer/layer_8/rel_attn/o/kernel/adam_m, model/transformer/layer_8/rel_attn/o/kernel/adam_v, model/transformer/layer_8/rel_attn/q/kernel/adam_m, model/transformer/layer_8/rel_attn/q/kernel/adam_v, model/transformer/layer_8/rel_attn/r/kernel/adam_m, model/transformer/layer_8/rel_attn/r/kernel/adam_v, model/transformer/layer_8/rel_attn/v/kernel/adam_m, model/transformer/layer_8/rel_attn/v/kernel/adam_v, model/transformer/layer_9/ff/LayerNorm/beta/adam_m, model/transformer/layer_9/ff/LayerNorm/beta/adam_v, model/transformer/layer_9/ff/LayerNorm/gamma/adam_m, model/transformer/layer_9/ff/LayerNorm/gamma/adam_v, model/transformer/layer_9/ff/layer_1/bias/adam_m, model/transformer/layer_9/ff/layer_1/bias/adam_v, model/transformer/layer_9/ff/layer_1/kernel/adam_m, model/transformer/layer_9/ff/layer_1/kernel/adam_v, model/transformer/layer_9/ff/layer_2/bias/adam_m, model/transformer/layer_9/ff/layer_2/bias/adam_v, model/transformer/layer_9/ff/layer_2/kernel/adam_m, model/transformer/layer_9/ff/layer_2/kernel/adam_v, model/transformer/layer_9/rel_attn/LayerNorm/beta/adam_m, model/transformer/layer_9/rel_attn/LayerNorm/beta/adam_v, model/transformer/layer_9/rel_attn/LayerNorm/gamma/adam_m, model/transformer/layer_9/rel_attn/LayerNorm/gamma/adam_v, model/transformer/layer_9/rel_attn/k/kernel/adam_m, model/transformer/layer_9/rel_attn/k/kernel/adam_v, model/transformer/layer_9/rel_attn/o/kernel/adam_m, model/transformer/layer_9/rel_attn/o/kernel/adam_v, model/transformer/layer_9/rel_attn/q/kernel/adam_m, model/transformer/layer_9/rel_attn/q/kernel/adam_v, model/transformer/layer_9/rel_attn/r/kernel/adam_m, model/transformer/layer_9/rel_attn/r/kernel/adam_v, model/transformer/layer_9/rel_attn/v/kernel/adam_m, model/transformer/layer_9/rel_attn/v/kernel/adam_v, model/transformer/mask_emb/mask_emb/adam_m, model/transformer/mask_emb/mask_emb/adam_v, model/transformer/r_r_bias/adam_m, model/transformer/r_r_bias/adam_v, model/transformer/r_s_bias/adam_m, model/transformer/r_s_bias/adam_v, model/transformer/r_w_bias/adam_m, model/transformer/r_w_bias/adam_v, model/transformer/seg_embed/adam_m, model/transformer/seg_embed/adam_v, model/transformer/word_embedding/lookup_table/adam_m, model/transformer/word_embedding/lookup_table/adam_v\n",
      "Save PyTorch model to /home/husein/xlnet/xlnet-base-bahasa-standard-cased/pytorch_model.bin\n",
      "Save configuration file to /home/husein/xlnet/xlnet-base-bahasa-standard-cased/config.json\r\n"
     ]
    }
   ],
   "source": [
    "!transformers-cli convert --model_type xlnet \\\n",
    "  --tf_checkpoint xlnet-base/model.ckpt-500000 \\\n",
    "  --config xlnet-base/xlnet-base_config.json \\\n",
    "  --pytorch_dump_output xlnet-base-bahasa-standard-cased"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "directory = 'xlnet-base-bahasa-standard-cased'\n",
    "config = XLNetConfig(f'{directory}/config.json')\n",
    "config.vocab_size = 32000\n",
    "config.d_inner = 3072\n",
    "config.d_model = 768\n",
    "config.n_head = 12\n",
    "config.n_layer = 12"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/husein/.local/lib/python3.6/site-packages/transformers/modeling_auto.py:798: FutureWarning: The class `AutoModelWithLMHead` is deprecated and will be removed in a future version. Please use `AutoModelForCausalLM` for causal language models, `AutoModelForMaskedLM` for masked language models and `AutoModelForSeq2SeqLM` for encoder-decoder models.\n",
      "  FutureWarning,\n"
     ]
    }
   ],
   "source": [
    "model = AutoModelWithLMHead.from_pretrained('./xlnet-base-bahasa-standard-cased/pytorch_model.bin', config = config)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "fill_mask = pipeline('fill-mask', model=model, tokenizer=tokenizer)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'sequence': 'makan ayam dengan.<sep><cls>',\n",
       "  'score': 0.18475300073623657,\n",
       "  'token': 9,\n",
       "  'token_str': '.'},\n",
       " {'sequence': 'makan ayam dengan<eod><sep><cls>',\n",
       "  'score': 0.179636612534523,\n",
       "  'token': 7,\n",
       "  'token_str': '<eod>'},\n",
       " {'sequence': 'makan ayam dengan <sep><cls>',\n",
       "  'score': 0.14987488090991974,\n",
       "  'token': 19,\n",
       "  'token_str': '▁'},\n",
       " {'sequence': 'makan ayam dengannya<sep><cls>',\n",
       "  'score': 0.11687928438186646,\n",
       "  'token': 26,\n",
       "  'token_str': 'nya'},\n",
       " {'sequence': 'makan ayam dengan,<sep><cls>',\n",
       "  'score': 0.05017939582467079,\n",
       "  'token': 21,\n",
       "  'token_str': ','}]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fill_mask('makan ayam dengan <mask>')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.save_pretrained('xlnet-base-bahasa-standard-cased')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# !transformers-cli upload ./xlnet-base-bahasa-standard-cased"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = XLNetModel.from_pretrained('huseinzol05/xlnet-base-bahasa-cased')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tokenizer = XLNetTokenizer.from_pretrained('huseinzol05/xlnet-base-bahasa-cased', do_lower_case = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "input_ids = torch.tensor([tokenizer.encode(\"husein tk suka mkan ayam\", add_special_tokens=True)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with torch.no_grad():\n",
    "    last_hidden_states = model(input_ids)[0]\n",
    "    \n",
    "last_hidden_states\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = AutoModelWithLMHead.from_pretrained('huseinzol05/xlnet-base-bahasa-cased')\n",
    "fill_mask = pipeline('fill-mask', model=model, tokenizer=tokenizer)\n",
    "fill_mask('makan ayam dengan <mask>')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
